* Test for effect of the CAP using USDA state-level admin data
* 4/29/2025

clear all
set more off

program main
	load_usda_data
	load_population_data
	
	run_regressions
	percent_effects
end

program load_usda_data
	* append years
	use "$intermediate/cleaned_usda/snap_ssi_2000.dta", clear
	ren snap_ssi_count snap_ssi_2000
	forval year = 2001/2016 {
		di `year'
		merge 1:1 state using "$intermediate/cleaned_usda/snap_ssi_`year'.dta", assert(1 2 3) keep(3) nogen
		ren snap_ssi_count snap_ssi_`year'
	}
	
	* clean data
	foreach var of varlist snap_ssi* {
		replace `var' = "" if `var' == "–"
		destring `var', replace
	}
	reshape long snap_ssi_, i(state) j(year)
	ren snap_ssi_ snap_ssi
	drop if mi(snap_ssi)
	save "$intermediate/state_year_admin_data_usda", replace
end

program load_population_data // read in state-year population aggregates from Census
	// download here: https://www.census.gov/data/datasets/time-series/demo/popest/2010s-state-total.html
	// downloaded on May 26, 2025
	import excel using "$raw/nst-est2019-01.xlsx", clear  // 2010-2019
	
	* cleaning
	keep A D E F G H I J  // state variable plus popoulations in 2010-2016
	drop if _n < 10 | _n > 60  // drop header and footer rows
	ren (A D E F G H I J) ///
		(state pop_2010 pop_2011 pop_2012 pop_2013 pop_2014 pop_2015 pop_2016)
	destring pop_2010, replace
	replace state = subinstr(state, ".", "",.)
	save "$intermediate/census_state_year_populations_2010_2016", replace
	
	* read 2000-2009 population data and merge 2010-2016
	// download here: https://www.census.gov/data/tables/time-series/demo/popest/intercensal-2000-2010-state.html
	// downloaded on May 26, 2025
	import excel using "$raw/st-est00int-01.xls", clear
	
	keep A C D E F G H I J K L  // state variable plus populations in 2000-2009
	drop if _n < 10 | _n > 60  // drop header and footer rows
	ren (A C D E F G H I J K L) ///
		(state pop_2000 pop_2001 pop_2002 pop_2003 pop_2004 pop_2005 pop_2006 ///
		pop_2007 pop_2008 pop_2009)
	destring pop_2000, replace
	replace state = subinstr(state, ".", "",.)
	
	merge 1:1 state using "$intermediate/census_state_year_populations_2010_2016", ///
		assert(3) keep(3) nogen
	
	reshape long pop_, i(state) j(year)
	ren pop_ population
	save "$intermediate/census_state_year_populations_2000_2016", replace
end

program run_regressions
	local state_snap_controls = "i.has_bbce i.has_call i.has_faceini i.has_facerec i.has_oapp i.has_no_fp"
	
	* read in IPUMS data to get CAP treatment status and covariates
	use "$for_analysis/state_for_regressions_actual_ssi", clear
	keep statefip year startyear inter has_* relyr*
	duplicates drop
	
	decode statefip, gen(state)
	replace state = strproper(state)
	
	* merge in USDA data: counts of individuals enrolled in both SNAP and SSI
	merge 1:1 state year using "$intermediate/state_year_admin_data_usda", ///
		assert(1 2 3) keep(3) nogen
	merge 1:1 state year using "$intermediate/census_state_year_populations_2000_2016", ///
		assert(2 3) keep(3) nogen // merge in state populations for weighting
	drop state
	gunique state  // only California is missing (not present in USDA data)
	tab year
	
	qui su snap_ssi if relyr == -1, d  // baseline participation rate in treated states
	global base_state = `r(mean)'
	di "baseline SNAP-SSI count: `r(mean)'"
	
	* regressions: state-level DiD
	reghdfe snap_ssi inter [aw = population], ///
		a(i.statefip i.year) vce(cl statefip) // no controls
	est sto reg1
	qui estadd local statecont " ", replace
	qui estadd local stateFE "\checkmark", replace
	qui estadd local yearFE "\checkmark", replace
	global beta_reg1 = _b[inter] / $base_state
	
	reghdfe snap_ssi inter `state_snap_controls' [aw = population], ///
		a(i.statefip i.year) vce(cl statefip) // state controls
	est sto reg2
	qui estadd local statecont "\checkmark", replace
	qui estadd local stateFE "\checkmark", replace
	qui estadd local yearFE "\checkmark", replace
	global beta_reg2 = _b[inter] / $base_state
	
	* generate table
	esttab reg1 reg2 ///
		using "$output/admin_data_reg", replace ///
		keep(inter) order(inter) ///
		varlabels(inter "Treated x Post") ///
		se(%5.3f) star(* 0.10 ** 0.05 *** 0.01) stats(yearFE stateFE statecont N, ///
		labels("Year FE" "State FE" "State Controls" "N" ) ///
		fmt("%9.0fc" "%9.0fc" "%9.0fc" "%9.0fc" "%9.0fc" "%9.0fc")) scalars() ///
		nomtitles label nonote booktabs
		
	* regressions: dynamic
	local omit = 6
	reghdfe snap_ssi ib`omit'.relyr_pos ///
		[w = population], a(i.statefip i.year) vce(cl statefip)
	est sto admin_reg
	coefplot (admin_reg, ciopts(recast(rcap) lcolor(blue)) mcolor(blue)), ///
		keep(2.relyr_pos 3.relyr_pos 4.relyr_pos 5.relyr_pos 6.relyr_pos 7.relyr_pos ///
		8.relyr_pos 9.relyr_pos 10.relyr_pos 11.relyr_pos 12.relyr_pos) ///
		vertical xscale(titlegap(2)) xline(5.5, lp(dash) lcolor(gs12)) ///
		yline(0, lwidth(thin) lp(dash) lcolor(black)) ylab(#7, angle(0)) graphregion(fcolor(white) ///
		lcolor(white) lwidth(vvvthin)) ciopts(lwidth(*1.5) lcolor(black)) mcolor(black) ///
		xtitle("Years Since CAP Introduced") ytitle("Estimate and 95% CI") baselevel
	graph export "$output/dynamic_admin_data_reg.png", replace
end

program percent_effects
	di "baseline SNAP-SSI count: $base_state"

	di "percent effects:"
	* twfe
	matrix percent_twfe = ${beta_reg1}, ${beta_reg2}
	mat li percent_twfe
end

* Execute
main
